#Umeda, Michio (2022) "Dyadic Representation in Parliamentary Democracy in Japan" 
#Japanse Journal of Political Science
#The data management code is available from the author upon request

#The outcomes would be slightly different from those in the article 
#due to the Bayesian estimation of the candidate position, 
#which returns a slightly different estimation every time.

#install.packages("lme4")
#install.packages("interplot")

library(lme4)
library(interplot)

cand0312 <- read.csv("cand.data_2003-2012.csv")
smd0312 <- read.csv("dist.data_2003-2012.csv")
census300 <- read.csv("census300.csv", header = T)


###ANALYSIS

##This replicate Table 1 with minor difference
result01 <- lm(policyscore ~ res + factor(partyaff), 
           data=cand0312)
summary(result01)

qt <- quantile(census300$res, .25)*result01$coefficients[2] - quantile(census300$res, .75)*result01$coefficients[2]
ten <- quantile(census300$res, .10)*result01$coefficients[2] - quantile(census300$res, .90)*result01$coefficients[2]

qt/result01$coefficients[3]
ten/result01$coefficients[3]

mean(census300$res)
sd(census300$res)

mean(cand0312$policyscore)
sd(cand0312$policyscore)

sd(census300$res)*result01$coefficients[2]
(sd(census300$res)*result01$coefficients[2])/sd(cand0312$policyscore)

quantile(census300$res, .77)

##This replicate Table 2
result02 <- 
  lmer(margin ~ psd*res
       + encand
       + factor(year)  
       + (1|distcode) + (1|name.LDP) + (1|name.DPJ), 
       data=smd0312
  )
summary(result02)

##Figures
library(ggplot2)

#Figure 1. LDP/DPJ candidate urban-rural policy score.
fig1 <- ggplot() +
  theme_bw() + 
  geom_histogram(aes(x=cand0312$policyscore[cand0312$partyaff==2], 
                     y=-..count..), 
                 fill = "#ccc8c3", color = "black", 
                 breaks=seq(-3, 3, 1/3)) + 
  geom_histogram(aes(x=cand0312$policyscore[cand0312$partyaff==1], 
                     y=..count..), 
                 fill = "white", color = "black", 
                 breaks=seq(-3, 3, 1/3)) + 
  geom_label(aes(x = 2, y = 75, label = "LDP"), color = "black") +                       # ???ϓI?ɗ??����₷???悤?Ƀ??x???̐F?Ɩ_?̓h???Ԃ??̐F???????ɂȂ??悤?Ɏw??
  geom_label(aes(x = 1.5, y = -50, label = "DPJ"), color = "black", fill = "#ccc8c3") +
  labs(x = "Urban-Rural Policy Score", y = "Number of Candidates", 
       title = "Figure 1. LDP/DPJ candidate urban-rural policy score.") + 
  theme(text = element_text(family = "serif"))

#Figure 2
ggplot(census300, aes(x=res)) +
  theme_bw() + 
  geom_histogram(colour = "black", fill = "white", 
                 binwidth = 0.02, boundary = 0) +
  labs(x = "Proportion of Employment in Agriculture, Forestry, Fishery, and Construction", 
       y = "Number of Districts", 
       title = "Figure 2. Employment structure in Lower House districts.",
       caption = "Note: 2003-2012 district boundaries, based on 2010 Census.") +
  theme(text = element_text(family = "serif"))


#Figure 3. Candidate policy score and district employment structure.
ggplot(cand0312,
       aes(y=policyscore, x=res, size=(I(victory>0)+1)*2)) + 
  geom_point(aes(shape = factor(partyaff)), position="jitter", alpha=0.4) + 
  theme_bw() + 
  labs(shape="Party Affiliation") +
  labs(size="# of SMD Victory") +
  scale_shape_manual(values = c(1, 19), labels = c("LDP", "DPJ")) +
  labs(x="Proportion of Employment in Agriculture, Forestry, Fishery and Construction", 
       y="Urban-Rural Policy Score",
       title = "Figure 3. Candidate policy score and district employment structure.") +
  geom_smooth(method = "loess", 
              data = subset(cand0312, cand0312$partyaff==1), 
              se=F, colour = "gray30", size = 1.25, alpha = 0.25) + 
  geom_smooth(method = "loess", 
              data = subset(cand0312, cand0312$partyaff==2), 
              se=F, colour = "gray20", size = 1.25, alpha = 0.25) +
  theme(text = element_text(family = "serif"))


#Figure 4
fig4 <- interplot(m = result02, var1 = "psd", var2 = "res", 
                  hist = F, adjCI = TRUE) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  ggtitle("Figure 4. Marginal effects of policy differnce for the electoral outcomes.") +
  xlab("Proportion of Employment in Agriculture, Forestry, Fishery and Construction") +
  ylab("Marginal Effect of the Candidates' Policy Score Difference") +  
  theme_bw() + 
  theme(text = element_text(family = "serif"))


#simulation for Figure 5
ldp25 <- quantile(cand0312$policyscore[cand0312$partyaff==1], .25, na.rm=T)
ldp50 <-median(cand0312$policyscore[cand0312$partyaff==1], na.rm=T)
ldp75 <-quantile(cand0312$policyscore[cand0312$partyaff==1], .75, na.rm=T)
dpj50 <- median(cand0312$policyscore[cand0312$partyaff==2], na.rm=T)

emp10 <- quantile(census300$res, .10)
emp25 <- quantile(census300$res, .25)
emp50 <- quantile(census300$res, .50)
emp75 <- quantile(census300$res, .75)
emp90 <- quantile(census300$res, .90)

#both at median
median_result02 <- c(
  (ldp50-dpj50)*summary(result02)$coefficients[2,1] + 
    emp10*summary(result02)$coefficients[3,1] + 
    (ldp50-dpj50)*ldp50*emp10*summary(result02)$coefficients[8,1],
  
  (ldp50-dpj50)*summary(result02)$coefficients[2,1] + 
    emp25*summary(result02)$coefficients[3,1] + 
    (ldp50-dpj50)*ldp50*emp25*summary(result02)$coefficients[8,1],
  
  (ldp50-dpj50)*summary(result02)$coefficients[2,1] + 
    emp50*summary(result02)$coefficients[3,1] + 
    (ldp50-dpj50)*ldp50*emp50*summary(result02)$coefficients[8,1],
  
  (ldp50-dpj50)*summary(result02)$coefficients[2,1] + 
    emp75*summary(result02)$coefficients[3,1] + 
    (ldp50-dpj50)*ldp50*emp75*summary(result02)$coefficients[8,1],
  
  (ldp50-dpj50)*summary(result02)$coefficients[2,1] + 
    emp90*summary(result02)$coefficients[3,1] + 
    (ldp50-dpj50)*ldp50*emp90*summary(result02)$coefficients[8,1]
)

#LDP moves to 25 percentile
ldp25_result02 <- c(
  (ldp25-dpj50)*summary(result02)$coefficients[2,1] + 
    emp10*summary(result02)$coefficients[3,1] + 
    (ldp25-dpj50)*ldp25*emp10*summary(result02)$coefficients[8,1],
  
  (ldp25-dpj50)*summary(result02)$coefficients[2,1] + 
    emp25*summary(result02)$coefficients[3,1] + 
    (ldp25-dpj50)*ldp25*emp25*summary(result02)$coefficients[8,1],
  
  (ldp25-dpj50)*summary(result02)$coefficients[2,1] + 
    emp50*summary(result02)$coefficients[3,1] + 
    (ldp25-dpj50)*ldp25*emp50*summary(result02)$coefficients[8,1],
  
  (ldp25-dpj50)*summary(result02)$coefficients[2,1] + 
    emp75*summary(result02)$coefficients[3,1] + 
    (ldp25-dpj50)*ldp25*emp75*summary(result02)$coefficients[8,1],
  
  (ldp25-dpj50)*summary(result02)$coefficients[2,1] + 
    emp90*summary(result02)$coefficients[3,1] + 
    (ldp25-dpj50)*ldp25*emp90*summary(result02)$coefficients[8,1]
)

#LDP moves to 75 percentile
ldp75_result02 <- c(
  (ldp75-dpj50)*summary(result02)$coefficients[2,1] + 
    emp10*summary(result02)$coefficients[3,1] + 
    (ldp75-dpj50)*ldp75*emp10*summary(result02)$coefficients[8,1],
  
  (ldp75-dpj50)*summary(result02)$coefficients[2,1] + 
    emp25*summary(result02)$coefficients[3,1] + 
    (ldp75-dpj50)*ldp75*emp25*summary(result02)$coefficients[8,1],
  
  (ldp75-dpj50)*summary(result02)$coefficients[2,1] + 
    emp50*summary(result02)$coefficients[3,1] + 
    (ldp75-dpj50)*ldp75*emp50*summary(result02)$coefficients[8,1],
  
  (ldp75-dpj50)*summary(result02)$coefficients[2,1] + 
    emp75*summary(result02)$coefficients[3,1] + 
    (ldp75-dpj50)*ldp75*emp75*summary(result02)$coefficients[8,1],
  
  (ldp75-dpj50)*summary(result02)$coefficients[2,1] + 
    emp90*summary(result02)$coefficients[3,1] + 
    (ldp75-dpj50)*ldp75*emp90*summary(result02)$coefficients[8,1]
)

cbind(median_result02, ldp75_result02, ldp25_result02)
